library(data.table)
library(lfe)
library(jtools)
library(stargazer)
library(minpack.lm)
source("nlstargazer.r")
Read file generated in Python
df <- fread("data_output/regression_Chile_2021.csv")
df <- subset(df, candidate == "JOSE ANTONIO KAST RIST") #
df
Run OLS Regression Models
model_1 <- lm(rate ~ rate_parisi + rate_kast + rate_meo + rate_sichel + rate_provoste, data = df)
model_2 <- lm(rate ~ divisiveness_parisi + divisiveness_kast + divisiveness_meo + divisiveness_sichel + divisiveness_provoste, data = df)
model_3 <- lm(rate ~ rate_parisi + rate_kast + rate_meo + rate_sichel + rate_provoste + divisiveness_parisi + divisiveness_kast + divisiveness_meo + divisiveness_sichel + divisiveness_provoste, data = df)
stargazer(model_1, model_2, model_3, type = "text")
===============================================================================================================
Dependent variable:
-----------------------------------------------------------------------------------------
rate
(1) (2) (3)
---------------------------------------------------------------------------------------------------------------
rate_parisi 0.481*** 0.477***
(0.003) (0.004)
rate_kast 1.244*** 1.218***
(0.002) (0.003)
rate_meo 0.164*** 0.165***
(0.006) (0.006)
rate_sichel 0.871*** 0.893***
(0.004) (0.005)
rate_provoste 0.389*** 0.407***
(0.004) (0.004)
divisiveness_parisi -0.414*** 0.137***
(0.040) (0.016)
divisiveness_kast 3.751*** 0.416***
(0.059) (0.023)
divisiveness_meo -11.985*** -0.968***
(0.146) (0.056)
divisiveness_sichel 2.131*** 0.181***
(0.103) (0.036)
divisiveness_provoste -0.317*** -0.323***
(0.072) (0.027)
Constant -0.142*** 0.514*** -0.138***
(0.002) (0.006) (0.003)
---------------------------------------------------------------------------------------------------------------
Observations 46,638 46,639 46,638
R2 0.907 0.229 0.908
Adjusted R2 0.907 0.229 0.908
Residual Std. Error 0.041 (df = 46632) 0.117 (df = 46633) 0.040 (df = 46627)
F Statistic 90,572.760*** (df = 5; 46632) 2,775.638*** (df = 5; 46633) 46,106.500*** (df = 10; 46627)
===============================================================================================================
Note: *p<0.1; **p<0.05; ***p<0.01
model_1 <- nlsLM(
formula = rate ~ const + b_parisi*rate_parisi + b_kast*rate_kast + b_meo*rate_meo + b_sichel*rate_sichel + b_provoste*rate_provoste,
data = df,
start = list(const=0, b_parisi=0, b_kast=0, b_meo=0, b_sichel=0, b_provoste=0),
lower = c(const=-Inf, b_parisi=0, b_kast=0, b_meo=0, b_sichel=0, b_provoste=0),
upper = c(const=Inf, b_parisi=1, b_kast=1, b_meo=1, b_sichel=1, b_provoste=1),
algorithm = "port",
model = FALSE
)
model_2 <- nlsLM(
formula = rate ~ const + d_parisi*divisiveness_parisi + d_kast*divisiveness_kast + d_meo*divisiveness_meo + d_sichel*divisiveness_sichel + d_provoste*divisiveness_provoste,
data = df,
start = list(const=0, d_parisi=0, d_kast=0, d_meo=0, d_sichel=0, d_provoste=0)
)
model_3 <- nlsLM(
formula = rate ~ const + b_parisi*rate_parisi + b_kast*rate_kast + b_meo*rate_meo + b_sichel*rate_sichel + b_provoste*rate_provoste + d_parisi*divisiveness_parisi + d_kast*divisiveness_kast + d_meo*divisiveness_meo + d_sichel*divisiveness_sichel + d_provoste*divisiveness_provoste,
data = df,
start = list(const=0, b_parisi=0, b_kast=0, b_meo=0, b_sichel=0, b_provoste=0, d_parisi=0, d_kast=0, d_meo=0, d_sichel=0, d_provoste=0),
lower = c(const=-Inf, b_parisi=0, b_kast=0, b_meo=0, b_sichel=0, b_provoste=0, d_parisi=-Inf, d_kast=-Inf, d_meo=-Inf, d_sichel=-Inf, d_provoste=-Inf),
upper = c(const=Inf, b_parisi=1, b_kast=1, b_meo=1, b_sichel=1, b_provoste=1, d_parisi=Inf, d_kast=Inf, d_meo=Inf, d_sichel=Inf, d_provoste=Inf)
)
models <- list(model_1, model_2, model_3)
nlstargazer(models = models)
| Parameters | Model.1 | Model.2 | Model.3 |
|---|---|---|---|
| b_kast | 1.0000*** | 1.0000*** | |
| b_meo | 0.0000 | 0.0000 | |
| b_parisi | 0.3670*** | 0.3585*** | |
| b_provoste | 0.3107*** | 0.3258*** | |
| b_sichel | 0.8092*** | 0.9105*** | |
| const | -0.0263*** | 0.5140*** | -0.0303*** |
| d_kast | 3.7511*** | 0.9900*** | |
| d_meo | -11.9850*** | -2.4661*** | |
| d_parisi | -0.4138*** | 0.2355*** | |
| d_provoste | -0.3169*** | -0.3763*** | |
| d_sichel | 2.1307*** | 0.0855** | |
| Residual sum-of-squares | 96.46 | 633.17 | 84.9 |
NA
data1r <- fread("data_output/Chile/2021_first_round.csv.gz")
data_1r <- data1r[ , .(value = sum(value)), by = candidate]
data_1r$rate <- data_1r$value / sum(data_1r$value)
data_1r
candidates <- c(subset(data_1r, rate > 0.02)[["candidate"]])
candidates
[1] "GABRIEL BORIC FONT" "JOSE ANTONIO KAST RIST" "YASNA PROVOSTE CAMPILLAY" "SEBASTIAN SICHEL RAMIREZ"
[5] "MARCO ENRIQUEZ-OMINAMI GUMUCIO" "FRANCO PARISI FERNANDEZ"
coefs <- coef(model_1)
df_model <- data.table(
candidate = c("JOSE ANTONIO KAST RIST", "YASNA PROVOSTE CAMPILLAY", "SEBASTIAN SICHEL RAMIREZ", "MARCO ENRIQUEZ-OMINAMI GUMUCIO", "FRANCO PARISI FERNANDEZ"),
coef = c(coefs[["b_kast"]], coefs[["b_provoste"]], coefs[["b_sichel"]], coefs[["b_meo"]], coefs[["b_parisi"]])
)
df_model <- merge(df_model, data_1r, all=TRUE)
df_model[["1_coef"]] <- 1 - df_model$coef
df_model[["to_candidate_a"]] <- df_model[["rate"]] * df_model[["coef"]]
df_model[["to_candidate_b"]] <- df_model[["rate"]] * df_model[["1_coef"]]
df_model
draw_sankey(df_model, candidates)
[1] "FRANCO PARISI FERNANDEZ"
[1] "JOSE ANTONIO KAST RIST"
[1] "MARCO ENRIQUEZ-OMINAMI GUMUCIO"
[1] "SEBASTIAN SICHEL RAMIREZ"
[1] "YASNA PROVOSTE CAMPILLAY"
[1] "#0D0066"
source <- c()
target <- c()
value <- c()
idx_candidate_a <- length(candidates) + 0
idx_candidate_b <- length(candidates) + 1
for(i in 1:nrow(df_model)) {
row <- df_model[i, ]
candidate <- row[["candidate"]]
idx <- which(candidates == candidate)[1] - 1
if (!(is.na(row[["coef"]]))) {
print(candidate)
source <- append(source, idx)
target <- append(target, idx_candidate_a) # BORIC
value <- append(value, row[["to_candidate_b"]] * 100)
source <- append(source, idx)
target <- append(target, idx_candidate_b) # KAST
value <- append(value, row[["to_candidate_a"]] * 100)
}
else if (candidate == "GABRIEL BORIC FONT") {
source <- append(source, idx)
target <- append(target, idx_candidate_a) # BORIC
value <- append(value, row[["rate"]] * 100)
source <- append(source, idx)
target <- append(target, idx_candidate_b) # KAST
value <- append(value, 0)
}
}
[1] "FRANCO PARISI FERNANDEZ"
[1] "JOSE ANTONIO KAST RIST"
[1] "MARCO ENRIQUEZ-OMINAMI GUMUCIO"
[1] "SEBASTIAN SICHEL RAMIREZ"
[1] "YASNA PROVOSTE CAMPILLAY"
# Load the package required to read JSON files.
library("rjson")
# Give the input file name to the function.
colors <- fromJSON(file="consts.json")
# Print the result.
print(colors[["FRANCO PARISI FERNANDEZ"]])
[1] "#0D0066"
label <- c(candidates, c("GABRIEL BORIC FONT", "JOSE ANTONIO KAST RIST"))
color_label <- c()
for (x in label) {
color_label <- append(color_label, colors[[x]])
}
color_label
[1] "#E71455" "#22446d" "#870C7C" "#E7DD14" "#F429E3" "#0D0066" "#E71455" "#22446d"
library(plotly)
fig <- plot_ly(
type = "sankey",
orientation = "h",
node = list(
label = label,
color = color_label,
pad = 15,
thickness = 20,
line = list(
color = "black",
width = 0
)
),
link = list(
source = source,
target = target,
value = value,
color = "#ebebeb"
)
)
fig <- fig %>% layout(
title = "",
font = list(
size = 16
)
)
fig